from pyspark.sql import SparkSession

# Driver
from pyspark.sql.functions import when, col
from pyspark.sql.types import DoubleType

spark = SparkSession \
    .builder \
    .master('local') \
    .appName('HelloSpark') \
    .getOrCreate()

df = spark.read \
    .option('header', True) \
    .option('inferSchema', True) \
    .csv('dataset\BeijingPM20100101_20151231.csv')

# when NA = 0, otherwise cast to double
df.select('No', 'year', 'month',
          when(col('PM_Dongsi') == 'NA', 0).otherwise(col('PM_Dongsi').cast(DoubleType())).alias('pm')) \
    .show(500)

# 3. replace替换
df.na.replace({'NA': 'NaN'} , 'PM_Dongsi').show(truncate=False)
